#
# More Donors, More Democracy
#
# Author: Sebastian Ziaja
# Date: 27 October 2018
#
# Replication script for
# Online Appendix B: The instrumental variable approach
#
#


workdir <- ""
# workdir <- ifelse(basename(getwd()) == "aiddem_new", "", "../")
source(paste0(workdir, "scripts/load_packages.R"))
source(paste0(workdir, "scripts//extract.custom.R"))
source(paste0(workdir, "scripts//functions.R"))
source(paste0(workdir, "scripts//formulas.R"))
source(paste0(workdir, "scripts//variable_labels.R"))

load(paste0(workdir, "data/aiddem_datasets.RData"))
ddm <- read_rds(paste0(workdir, "data/temp/aiddyad_m.rds"))
dd <- read_rds(paste0(workdir, "data/temp/aiddyad.rds"))
daid <- readRDS("data//daid.rds")

dX  <- d1
dXm <- d1m

# Functions ---------------------------------------------

trg <- function(x, clevel = .95, ...)
{
        screenreg(x
                  , ci.force = TRUE
                  , ci.force.level = clevel
                  , digits = 2
                  , controls = TRUE
                  , ...)
}


# Empirical validation -----------------------------------

## Scatterplots -----------------------------------


ds1 <-
    dd %>% group_by(don, year) %>%
    summarise(d = sum(Sgh),
              w = mean(wip, na.rm = TRUE))
cor.test(ds1$d, ds1$w)
scatterplot(ds1$w, ds1$d
            , xlab = "Share of female legislators"
            , ylab = "Number of recipients of dem. aid"
            )

ds2 <-
    dd %>% group_by(rec, year) %>%
    summarise(d = sum(Sgh),
              w = mean(Sgwh, na.rm = TRUE))
cor.test(ds2$d, ds2$w)
scatterplot(ds2$w, ds2$d
            , xlab = "Propensity to receive dem. aid, 1994-2013"
            , ylab = "Number of democracy donors"
            , jitter = list(y = 1)
            )

cor.test(dX$ZwvCMgwh94, dX$CMgnh)
scatterplot(dX$ZwvCMgwh94
            , dX$CMgnh
            , jitter = list(x = 0, y = 1)
            , ylab = "Number of democracy donors (jittered)"
            , xlab = "Z (female legislators * prop. dem. aid)"
            )

dXexhiw <-
    dX %>%
    anti_join(ds2 %>% filter(w > .6) %>% select(cc = rec, year),
               by = c("cc", "year"))

runmodel(
    # gsub("l.pop.log.r", "pop_log10_l",
    bf
    # , fixed = F)
    , data = dXexhiw %>% filter(smpl == 1) #%>% filter(smplR == 1)
) -> r

trg(r
    , custom.coef.map =
        list(
            "l.CMgnh" = vln[2],
            "l.CMgap.log" = vla[2],
            "`l.CMgnh(fit)`" = vlne[2],
            "`I(l.CMgap.log * 10)(fit)`" = vlae[2],
            "`IA_l(fit)`" = vlies[2],
            "l.CMenh" = vln[3],
            "l.CMeap.log" = vla[3],
            "l.pop.log.r" = vlctrls[1],
            "l.gdpcap.log.r" = vlctrls[2],
            "l.war25" = vlctrls[3]
        )
    , caption = "Excluding high-propensity cases"
    , custom.note = gsub("; ", paste0(";", get(paste0("brk", length(r) + 1))), cnt)
    )


## Reduced form estimates -----------------------------------------

r.red <- vector("list", 1)
r.red[[1]] <- felm(v2x.polyarchy.n ~ I(l.ZwvCMgwh94/10) |
                    cnamef + periodf | 0 | cnamef
                , data = dX %>% filter(smpl == 1))
r.red[[2]] <- felm(v2x.polyarchy.n ~ I(l.ZwvCMgwh94/10) +
                    l.pop.log.r + l.gdpcap.log.r + l.war25 |
                    cnamef + periodf | 0 | cnamef
                , data = dX %>% filter(smpl == 1))

screenreg(r.red
    , custom.coef.names = c(paste0(vlzps[2], "* 10"), vlctrls)
    # , reorder.coef = c(1:2, 6:9, 3:5)
        , ci.force = TRUE
        , caption.above = TRUE
        , digits = 2
        , controls = FALSE
)

## Temporal variation of the instrument ----------------------

# Temporal:
by(dX$ZwvCMgwh94, dX$cc, sd) -> vart
# Cross-sectional:
by(dX$ZwvCMgwh94, dX$period, sd) -> varc

dtv <-
    dd %>%
    group_by(don) %>%
    arrange(don, year) %>%
    mutate(
        donor = countrycode(don, "iso3c", "country.name"),
        # wipdiff = wip - lag(wip) # does not make sense;
        # must be aligned with electoral periods
        ) %>%
    ungroup %>%
    select(donor, year, wip) %>%
    unique

ggplot(data = dtv, aes(x = year, y = wip)) +
    geom_line() +
    facet_wrap(~ donor) +
    theme_bw()+
    ylab("Share of female legislators") +
    xlab("Year")


## Cross-sectional variation of the instrument --------------------

dpv <-
    dd %>%
    left_join(dX %>% select(rec = cc, year, smpl), by = c("rec", "year")) %>%
    filter(smpl == 1) %>%
    mutate(cn = countrycode(rec, "iso3c", "country.name",
                            custom_match = c("XKX" = "Kosovo")),
           cn = factor(cn),
           cn = factor(cn, levels = rev(levels(cn)))) %>%
    select(cn, don, Sgwh
           ) %>%
    unique

cnl <- dpv %>% select(cn) %>% unique %>% unlist %>% as.character %>% sort

cnl1 <- cnl[1:floor(length(cnl) / 2)]
cnl2 <- cnl[ceiling(length(cnl) / 2):length(cnl)]

p <- ggplot(dpv %>% filter(cn %in% cnl1), aes(x = cn, y = Sgwh))

p + geom_boxplot() +
     coord_flip() +
    theme_bw()+
    xlab("Recipient country") +
    ylab("Propensity to receive democracy aid, all donors")


p <- ggplot(dpv %>% filter(cn %in% cnl2), aes(x = cn, y = Sgwh))

p + geom_boxplot() +
     coord_flip() +
    theme_bw() +
    xlab("Recipient country") +
    ylab("Propensity to receive democracy aid, all donors")


# Discussing the assumptions ---------------------------------------

## Relaxing the exclusion assumption ---------------------------

# See "conley_endogeneity.do" for the analysis of the Confidence interval
# conditional on increasing endogeneity


## Is democracy aid facilitated by female legislators different? -------------

# Density plot:

dsub <-
    daid %>%
    filter(com > 0) %>%
    select(don, year, com, wipquant,
           civsoc, compart, grass, otherpart, elections,
           hr, freeflow, womorg) %>%
    melt(id = c("don", "year", "com", "wipquant")) %>%
    filter(value == 1, !is.na(wipquant)) %>%
    select(-value) %>%
    group_by(don, year, wipquant, variable) %>%
    summarise(com = sum(com, na.rm = TRUE)) %>%
    ungroup %>%
    group_by(don, year) %>%
    mutate(comtotal = sum(com, na.rm = TRUE),
           comshare = 100 * com / comtotal) %>%
    ungroup %>%
    mutate(wipqf = as.factor(wipquant))

sld <- as.data.frame(sectlabtab)
names(sld) <- c("label", "variable")

dsub <- left_join(dsub, sld, by = "variable")

ggplot(dsub,
       aes(x = comshare, fill = wipqf)) +
    geom_density(alpha = 0.25) +
    coord_cartesian(ylim = c(0, .075)) +
    facet_wrap(~ label) +
    xlab("Percentage of overall civil society aid") +
    ylab("Density") +
    guides(fill = guide_legend(title = "Female legislator\nshare quartiles")) +
    theme_bw() +
    theme(legend.position=c(.83,.15))
    # + ggsave(paste0(workdir, "graphs/demaid_shares_by_wip_quartiles.pdf"))

# KS test:

sect <- sectlabtab[, 2]
rks <- data.frame("KS test statistic" = rep(NA, length(sect)),
                  "P-value" = rep(NA, length(sect))
                  , row.names = gsub("\\n", " ", sectlabtab[, 1]) %>%
                      gsub("- ", "", .)
)
for(i in 1:length(sect)) {
    ks <- ks.test(
        dsub %>% filter(wipquant == 1, variable == sect[i]) %>% select(comshare) %>% unlist,
        dsub %>% filter(wipquant == 4, variable == sect[i]) %>% select(comshare) %>% unlist
        # dsub %>% filter(wipquant < 3, variable == sect[i]) %>% select(comshare) %>% unlist,
        # dsub %>% filter(wipquant > 2, variable == sect[i]) %>% select(comshare) %>% unlist
    )
    rks[i, 1] <- ks$statistic
    rks[i, 2] <- ks$p.value
}

dks <- dgender %>% filter(smpl == 1) %>% select(xh = Xg_gq4,
                                                xl = Xg_gq1)
ksd <- ks.test(dks$xh, dks$xl)
rks[nrow(rks) + 1, 1] <- ksd$statistic
rks[nrow(rks), 2] <- ksd$p.value
row.names(rks)[nrow(rks)] <- "Overall democracy aid"
names(rks) <- c("KS test statistic", "P-value")

rks %>% round(., 3)


# Regressions:

r <- vector("list", 1)
r[[1]] <- felm(v2x.polyarchy.n ~ Xg_gq12_log10_l +
                   Xg_gq34_log10_l |
                   cnamef + periodf | 0 | cnamef
               , data = dgender %>% filter(smpl == 1))
r[[2]] <- felm(v2x.polyarchy.n ~
                   Xg_gq12_log10_l + Xg_gq34_log10_l +
                   l.pop.log.r + l.gdpcap.log.r + l.war25 |
                   cnamef + periodf | 0 | cnamef
               , data = dgender %>% filter(smpl == 1))
r[[3]] <- felm(v2x.polyarchy.n ~ Sgh_gq12 +
                   Sgh_gq34 |
                   cnamef + periodf | 0 | cnamef
               , data = dgender %>% filter(smpl == 1))
r[[4]] <- felm(v2x.polyarchy.n ~
                   Sgh_gq12 + Sgh_gq34 +
                   l.pop.log.r + l.gdpcap.log.r + l.war25 |
                   cnamef + periodf | 0 | cnamef
               , data = dgender %>% filter(smpl == 1))


trg(r
    , caption = "Low vs. high female-legislator aid and donors"
    , custom.coef.map = list(
        "Xg_gq12_log10_l" = paste0(vlas[2], ", low female legislator share"),
        "Xg_gq34_log10_l" = paste0(vlas[2], ", high female legislator share"),
        "Sgh_gq12" = paste0(vlns[2], ", low female legislator share"),
        "Sgh_gq34" = paste0(vlns[2], ", high female legislator share")

    )
)

# Testing for spurious correlation --------------------------

## Trends by propensity groups ----------------------------

dmpm <-
    left_join(
        dd %>% group_by(rec, year) %>%
            summarise(pr = mean(Sgwh, na.rm = TRUE),
                      nd = sum(Sgh, na.rm = TRUE),
                      wip = mean(wip, na.rm = TRUE),
                      wipg = mean(wip * Sgh, na.rm = TRUE)),
        d1 %>% select(rec = cc, year, v2x.polyarchy.n, smpl),
        by = c("rec", "year"))

quantilenum <- cut2(dmpm$pr, g = 4)
levels(quantilenum) <- 1:4
dmpm$prq <- quantilenum

dp <-
    dmpm %>%
    filter(smpl == 1) %>%
    select(-smpl) %>%
    group_by(prq, year) %>%
    select(-rec) %>%
    summarise_all(funs(mean(., na.rm = TRUE))) %>%
    ungroup %>%
    na.omit

par(mfrow = c(1, 3), mar = c(5, 3, 4, 1))
dnp1 <-
    dd %>%
    select(don, year, wip) %>%
    unique %>%
    select(-don)
dnp1 %>% plot(., main = "Women in\nparliament",
         ylab = "", xlab = "Year")
lines(lowess(dnp1 %>% select(year, wip) %>% na.omit), col="red", lwd = 2)

plot(dp %>% filter(prq == 1) %>% select(year, nd), type = "l",
     main = "Average number of \ndemocracy donors\nby propensity group",
     ylab = "", xlab = "Year",
     ylim = c(min(dp$nd), max(dp$nd)))
for(i in 2:4) {
    lines(dp %>% filter(prq == i) %>% select(year, nd), lty = i)
}
legend(
    "topleft",
    legend = c("Least regular", "2nd quartile",
               "3rd quartile", "Most regular"),
    lty = 1:4
)

plot(dp %>% filter(prq == 1) %>% select(year, v2x.polyarchy.n), type = "l",
     main = "Average democracy score\nby propensity group",
     ylab = "", xlab = "Year",
     ylim = c(min(dp$v2x.polyarchy.n), max(dp$v2x.polyarchy.n)))
for(i in 2:4) {
    lines(dp %>% filter(prq == i) %>% select(year, v2x.polyarchy.n), lty = i)
}
legend(
    "bottomright",
    legend = c("Least regular", "2nd quartile",
               "3rd quartile", "Most regular"),
    lty = 1:4
)


## State-specific time trends in first stage -----------------------------

dX <-
    dX %>%
    left_join(
        dmp <-
            dd %>%
            group_by(rec, year) %>%
            summarise(pr = mean(Sgwh, na.rm = TRUE)) %>%
            ungroup %>%
            rename(cc = rec),
        by = c("cc", "year"))

quantilenum <- cut2(dX$pr, g = 4)
levels(quantilenum) <- 1:4
dX$prq <- quantilenum
dX <-
    dX %>%
    mutate(prqp1 = if_else(prq == 1, period, 0),
           prqp2 = if_else(prq == 2, period, 0),
           prqp3 = if_else(prq == 3, period, 0),
           prqp4 = if_else(prq == 4, period, 0))

r2 <- vector("list", 4)

r2[[1]] <- felm(v2x.polyarchy.n ~ 1 |
                      cnamef |
                      (l.CMgnh ~ prqp1 + prqp2 + prqp3 + prqp4 + l.ZwvCMgwh94) |
                      cnamef, data = dX %>% filter(smpl == 1))
r2[[2]] <- felm(v2x.polyarchy.n ~
                    l.pop.log.r + l.gdpcap.log.r + l.war25 |
                      cnamef |
                      (l.CMgnh ~ prqp1 + prqp2 + prqp3 + prqp4 + l.ZwvCMgwh94) |
                      cnamef, data = dX %>% filter(smpl == 1))
r2[[3]] <- felm(v2x.polyarchy.n ~
                    l.pop.log.r + l.gdpcap.log.r + l.war25
                    | cnamef |
                      (I(l.CMgap.log * 10) ~ prqp1 + prqp2 + prqp3 + prqp4 + l.ZwvCMgwh94) |
                      cnamef, data = dX %>% filter(smpl == 1))
r2[[4]] <- felm(v2x.polyarchy.n ~ l.pop.log.r + l.gdpcap.log.r + l.war25
                    | cnamef |
                      (IA_l ~ prqp1 + prqp2 + prqp3 + prqp4 + l.ZwvCMgwh94) |
                      cnamef, data = dX %>% filter(smpl == 1))


trg(r2
    , custom.coef.names =
        c(
            vlnes[2]
            , paste0(vlaes[2], " * 10")
            , vlies[2]
        )
    , omit.coef = "pop|gdp|war"
)


## Randomized placebo tests ----------------------------------------

# See "B1_spurious_placebo_test.R"

# Distance as alternative recipient instrument ---------------------------

r2 <- vector("list", 4)

r2[[1]] <- felm(v2x.polyarchy.n ~ 1 |
                      cnamef + periodf |
                      (l.CMgnh ~ l.ZwvDistCM) |
                      cnamef, data = dX %>% filter(smpl == 1))
r2[[2]] <- felm(v2x.polyarchy.n ~
                    l.pop.log.r + l.gdpcap.log.r + l.war25 |
                      cnamef + periodf |
                      (l.CMgnh ~ l.ZwvDistCM) |
                      cnamef, data = dX %>% filter(smpl == 1))
r2[[3]] <- felm(v2x.polyarchy.n ~
                    l.pop.log.r + l.gdpcap.log.r + l.war25
                    | cnamef + periodf |
                      (I(l.CMgap.log * 1) ~ l.ZwvDistCM) |
                      cnamef, data = dX %>% filter(smpl == 1))
r2[[4]] <- felm(v2x.polyarchy.n ~
                    l.pop.log.r + l.gdpcap.log.r + l.war25
                    | cnamef + periodf |
                      (IA_l ~ l.ZwvDistCM) |
                      cnamef, data = dX %>% filter(smpl == 1))


screenreg(r2,
       custom.coef.map = list(
            "`l.CMgnh(fit)`" = vlne[2],
            "`I(l.CMgap.log * 1)(fit)`" = vlae[2],
            "`IA_l(fit)`" = vlies[2],
            "l.pop.log.r" = vlctrls[1],
            "l.gdpcap.log.r" = vlctrls[2],
            "l.war25" = vlctrls[3]
       )
        , ci.force = TRUE
        , digits = 2
)

